#####################################################
## ** REPLICATION CODE **                          ##
## Yuri M. Zhukov                                  ##
## "Roads and the Diffusion of Insurgent Violence" ##
## Political Geography 31, no. 3 (March 2012)      ##
##                                                 ##
## For information on variable names, coding, etc. ##
## see technical appendix ("DataAppendix.pdf")     ##
#####################################################

#####################################
## Load libraries and data objects ##
#####################################

# Clear workspace
rm(list=ls())

# Set working directory
setwd("/mydirectory")

## Load libraries
options("repos"=c(CRAN="@CRAN@"))
#install.packages("fields")
library(fields)
#install.packages("mgcv")
library(mgcv)
#install.packages("classInt")
library(classInt)
#install.packages("maptools")
library(maptools)
#install.packages("MASS")
library(MASS)
#install.packages("mvtnorm")
library(mvtnorm)
#install.packages("xtable")
library(xtable)
source("Functions.R")

## Load Data
load("Data.RData")

## Load network matrices
# Road
load("RoadNet1.RData")
load("RoadNet2.RData")
roadnet <- rbind(roadnet1,roadnet2)
rm(roadnet1,roadnet2)

# Geodesic
load("GeoNet1.RData")
load("GeoNet2.RData")
geonet <- rbind(geonet1,geonet2)
rm(geonet1,geonet2)

## Load maps
load("MapData.RData")


###################################
## Figure 2 (correlation matrix) ##
###################################

## Select variables to include in matrix
Xs.main <- data[,c("REBEL.b","D.REBEL.ROAD.t1","D.REBEL.GEO.t1","GOV_MOP.b.t1","ROAD_5KM","POP","ELEVATION","IV_MOPUP","ELECTIONS","DIST_KM")]
Xs.main <- na.omit(Xs.main)

## Define labels corresponding to each variable
labels <- c("Insurgent Violence","Road Distance from Attack","Geo. Distance from Attack","Mop-Up","Road Access (5km)","Population Density","Elevation","Mop-Up (IV)","Election Cycle","Road Distance to Base")

## Create numerical correlation matrix
mat.cor <- round(cor(Xs.main),2)
rownames(mat.cor) <- labels
colnames(mat.cor) <- rownames(mat.cor) 

## Create color palette
pal <- c("darkblue","blue","lightblue","lightblue1","grey95","beige","lightgoldenrod1","orange","red","darkred")
cols.mat <- matrix(NA,nrow=nrow(mat.cor),ncol=ncol(mat.cor))
cols.mat <- ifelse(mat.cor>=-1&mat.cor<(-.8),pal[1],
ifelse(mat.cor>=-(.8)&mat.cor<(-.6),pal[2],
ifelse(mat.cor>=-(.6)&mat.cor<(-.4),pal[3],
ifelse(mat.cor>=-(.4)&mat.cor<(-.2),pal[4],
ifelse(mat.cor>=-(.2)&mat.cor<(0),pal[5],
ifelse(mat.cor>=-(0)&mat.cor<(.2),pal[6],
ifelse(mat.cor>=-(.2)&mat.cor<(.4),pal[7],
ifelse(mat.cor>=-(.4)&mat.cor<(.6),pal[8],
ifelse(mat.cor>=-(.6)&mat.cor<(.8),pal[9],
pal[10])))))))))
cols.mat <- cols.mat[ nrow(cols.mat):1,]

## Create grid that distinguishes negative vs. positive values
pchs.mat <- matrix(NA,nrow=nrow(mat.cor),ncol=ncol(mat.cor))
pchs.mat <- ifelse(mat.cor<0,4,NA)
pchs.mat <- pchs.mat[ nrow(pchs.mat):1,]

## Plot it! (will save to PDF in working directory)
pdf("Figure2.pdf",width=4,height=4)
par(mar=c(0,0,0,0))
plot(c(-8,nrow(mat.cor)+1), c(0,nrow(mat.cor)+3), type = "n", xlab="", ylab="", asp = 1,axes=F)
ups <- upper.tri(cols.mat,diag=T)[ncol(cols.mat):1,]
cols.mat[which(ups)] <- NA
ups <- upper.tri(pchs.mat,diag=T)[ncol(pchs.mat):1,]
pchs.mat[which(ups)] <- NA
for(i in 1:nrow(cols.mat)){for(j in 1:ncol(cols.mat)){points(x=i,y=j,cex=2.4,col=cols.mat[j,i],pch=15)
points(x=i,y=j,cex=2.4,col="grey",pch=pchs.mat[j,i])}}
text(x=0.5,y=ncol(cols.mat):1,pos=2,labels = c(NA,labels[-1]),cex=.7)
text(x=1:(ncol(cols.mat))-.35,y=ncol(cols.mat):1,pos=4,labels = c(labels[-length(labels)],NA),cex=.75, srt = 90)
legend.v2(x="bottom",cex=.7,fill=pal,legend=c("[-1, -.8)","[-.8, -.6)","[-.6, -.4)","[-.4, -.2)","[-.2, 0)","[0, .2)","[.2, .4)","[.4, .6)","[.6, .8)","[.8, 1]"),title="Correlation",ncol=5,pch=c(4,4,4,4,4,NA,NA,NA,NA,NA),bty="n",col=c(rep("darkgrey",5),rep(NA,5)))
dev.off()


#####################################
## Table 2 (GAM regression models) ##
#####################################


## Model 1 (no network)
mod1 <- gam(REBEL.b~ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT),data=data,family=binomial)
summary(mod1)

## Model 2 (geodesic network)
mod2 <- gam(REBEL.b~D.REBEL.GEO.t1+ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_D.REBEL.GEO.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT),data=data,family=binomial)
summary(mod2)

## Model 3 (road network)
mod3 <- gam(REBEL.b~D.REBEL.ROAD.t1+ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_D.REBEL.ROAD.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT),data=data,family=binomial)
summary(mod3)

## Convert to same format as in paper (new vs. recurring)
Tab1 <- CoefTable(mod1,5)
Tab2 <- CoefTable(mod2,5)
Tab3 <- CoefTable(mod3,5)
Tab1 <- rbind(Tab1[1:2,],rep(NA,3),rep(NA,3),rep(NA,3),rep(NA,3),Tab1[3:nrow(Tab1),])
Tab2 <- rbind(Tab2[1:4,],rep(NA,3),rep(NA,3),Tab2[5:nrow(Tab2),])
Tab3 <- rbind(Tab3[1:2,],rep(NA,3),rep(NA,3),Tab3[3:nrow(Tab3),])
Table <- cbind(Tab1,Tab2[,-1],Tab3[,-1])
row.names(Table) <- NULL
Table <- as.data.frame(Table)
Table[,1] <- as.character(Table[,1])
Table[3:6,1] <- c("D.REBEL.GEO.t1","","D.REBEL.ROAD.t1","")
Table




###########################################################
## Figure 3 (distance from attack and prob. of violence) ##
###########################################################


## Generate predited values
preds <- dist2road.cond(mod3)

## Plot it! (will save to PDF in working directory)
pdf("Figure3.pdf",width=8,height=3)
# No Attack
par(mfrow=c(1,2))
par(mar=c(4,4,.5,.5))
plot(y=preds$Y0,x=as.numeric(preds$X[,1]),type="l",ylim=c(0,max(preds$Y0.u)),xlab="Road Distance from Nearest Incident at t-1 (km)",ylab="Probability of New Violence",lwd=2,cex.lab=.9,col="darkgrey",xlim=c(0,750))
segments(x0=preds$X[,1],x1=preds$X[,1],y0=preds$Y0.u,y1=preds$Y0.l,col="grey")
lines(y=preds$Y0,x=preds$X[,1],type="l",lwd=2)
rug(jitter(sample(unique(data$D.REBEL.ROAD.t1),10000)))
legend(x="topright",col=c("black",NA),legend=c(expression(plain(Pr)(P%->%V)),"95% CI"),lwd=2,bty="n",cex=.7)
segments(x0=540:600,x1=540:600,y0=.0128,y1=.0136,col="grey")
# Attack
par(mar=c(4,4,.5,.5))
plot(y=preds$Y1,x=as.numeric(preds$X[,1]),type="l",ylim=c(min(preds$Y0.l),max(preds$Y1.u)),xlab="Road Distance from Nearest Incident at t-1 (km)",ylab="Probability of Recurring Violence",lwd=2,cex.lab=.9,col="darkgrey",xlim=c(0,750))
segments(x0=preds$X[,1],x1=preds$X[,1],y0=preds$Y1.u,y1=preds$Y1.l,col="grey")
lines(y=preds$Y1,x=preds$X[,1],type="l",lwd=2)
rug(jitter(unique(data$R_D.REBEL.ROAD.t1)))
legend(x="bottomright",col=c("black",NA),legend=c(expression(plain(Pr)(V%->%V)),"95% CI"),lwd=2,bty="n",cex=.7)
segments(x0=540:600,x1=540:600,y0=.06,y1=.02,col="grey")
dev.off()



###########################################
## Figure 4 (road vs geodesic distances) ##
###########################################

## Calculate kernal densities
roadnet <- as.matrix(roadnet)
geonet <- as.matrix(geonet)
dens.r <- density(roadnet)
dens.g <- density(geonet)

## Plot it! (will save to PDF in working directory)
pdf("Figure4.pdf",width=4,height=3)
par(mar=c(4,4,.5,.5))
plot(dens.r,ylim=c(0,max(dens.g$y,dens.r$y)), main="",xlab="Distance between villages (km)",ylab="Kernel Density",col="blue")
lines(dens.g,col="red",lwd=1.5,lty=2)
legend(x="topright",lty=c(1,2),col=c("blue","red"),lwd=c(1,2),legend=c("Road network","Geodesic"),cex=.8,bty="n")
dev.off()



####################################
## Figure 5 (outbreak simulation) ##
####################################

# Extract village names
months <- 200800+1:12
X <- subset(data,data$YRMO==200812)
NAMES <- c()
for(i in 1:length(X$NAME)){NAMES[i] <- .simpleCap(as.character(X$NAME[i]))}
NAMES <- gsub("?","",NAMES,fixed=T)

# Choose initial outbreak location
city <- "Groznyj"

## Plot it! (will save to PDF in working directory)
pdf(file="Figure5.pdf",height=4.5,width=6.5)
layout(matrix(c(1,3,5,7,1,3,5,7,1,3,5,7,1,3,5,7,2,4,6,8), 4, 5, byrow = FALSE),heights=c(1,1,1,.5))
# Main plot
simMap.cond(mod=mod1,data=data,W=roadnet,city=city,title="No Network",box.col="blue")
simMap.cond(mod=mod2,data=data,W=geonet,city=city,title="Geodesic Distance",box.col="firebrick2")
simMap.cond(mod=mod3,data=data,W=roadnet,city=city,title="Road Network",box.col="green4")
# Legend
par(mar=c(.5,.05,.05,10),xpd=T)
plot(x=0,y=0,col=NA,cex=1.1,pch=16,axes=F,xlim=c(min(X$LONG),max(X$LONG)),ylim=c(min(X$LAT),max(X$LAT)),xlab="",ylab="")
legend(x=min(X$LAT)+3,y=median(X$LONG)-1.5,cex=1,pch=c(1,NA),lty=c(NA,1),col=c("black","chocolate1"),legend=c("Outbreak location (t - 1)","Roads"),bty="n")
scalebar(loc=c(min(X$LAT)-2.5,median(X$LONG)-3.5),size=.8,length=km2d(500),unit="km",cex=5)
xlim <- c(min(X$LAT),max(X$LAT))
ylim <- c(min(X$LONG),max(X$LONG))
brks <- round(seq(min(elevation[[1]]),max(elevation[[1]]),length.out=13),0)
notchs <- seq(xlim[2]+(xlim[2]-xlim[1])*1.2,xlim[2]+(xlim[2]-xlim[1])*1.9,length.out=13)
text(x=notchs[13]-(notchs[13]-notchs[1])*.6,y=ylim[1]+(ylim[2]-ylim[1])*.55,labels="Elevation",cex=1,pos=4)
for (i in 1:12) rect(notchs[i],ylim[1]+(ylim[2]-ylim[1])*.5,notchs[i+1],ylim[1]+(ylim[2]-ylim[1])*.4,col=terrain.colors(12)[i])
text(x=notchs[2],y=ylim[1]+(ylim[2]-ylim[1])*.33,labels=brks[1],cex=.6)
text(x=notchs[7],y=ylim[1]+(ylim[2]-ylim[1])*0.33,labels=brks[7],cex=.6)
text(x=notchs[12],y=ylim[1]+(ylim[2]-ylim[1])*0.33,labels=paste(brks[13],"m"),cex=.6)
v1 <- which(NAMES==city)
window <- .5
xlim <- X$LONG[v1]+c(-window,window)
ylim <- X$LAT[v1]+c(-window,window)
par(mar=c(.1,.1,.1,.1),xpd=F)
plot(x=0,y=0,col=NA,cex=1.1,pch=16,axes=F,xlim=xlim,ylim=ylim)
scalebar(loc=c(xlim[1]+.05,mean(ylim)-.075),size=1,length=km2d(100),unit="km",cex=5)
dev.off()



###########################
## Table 3 (regional FE) ##
###########################

## Model 4 (no network)
mod4 <- gam(REBEL.b~ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT)+OBLAST_NAM,data=data,family=binomial)
summary(mod4)

## Model 5 (geodesic network)
mod5 <- gam(REBEL.b~D.REBEL.GEO.t1+ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_D.REBEL.GEO.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT)+OBLAST_NAM,data=data,family=binomial)
summary(mod5)

## Model 6 (road network)
mod6 <- gam(REBEL.b~D.REBEL.ROAD.t1+ROAD_5KM+POP+ELEVATION+GOV_MOP.b.t1+REBEL.b.t1+R_D.REBEL.ROAD.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_GOV_MOP.b.t1+s(LONG,LAT)+OBLAST_NAM,data=data,family=binomial)
summary(mod6)


## Convert to same format as in paper (new vs. recurring)
Tab1 <- CoefTableFE(mod4,5)
Tab2 <- CoefTableFE(mod5,5)
Tab3 <- CoefTableFE(mod6,5)
Tab1 <- rbind(Tab1[1:2,],rep(NA,3),rep(NA,3),rep(NA,3),rep(NA,3),Tab1[3:nrow(Tab1),])
Tab2 <- rbind(Tab2[1:4,],rep(NA,3),rep(NA,3),Tab2[5:nrow(Tab2),])
Tab3 <- rbind(Tab3[1:2,],rep(NA,3),rep(NA,3),Tab3[3:nrow(Tab3),])
Table <- cbind(Tab1,Tab2[,-1],Tab3[,-1])
row.names(Table) <- NULL
Table <- as.data.frame(Table)
Table[,1] <- as.character(Table[,1])
Table[3:6,1] <- c("D.REBEL.GEO.t1","","D.REBEL.ROAD.t1","")
Table



############################
## Table 4 (1st stage IV) ##
############################

## Model 7 (no network)
mod7 <- gam(GOV_MOP.b ~ ELECTIONS + DIST_KM + ROAD_5KM+POP+ELEVATION + REBEL.b.t1, data=data, family=binomial)
summary(mod7)
IV. <- predict.gam(mod7,newdata=data,type="response")
data$IV_MOPUP <- IV.
data$R_IV_MOPUP <- data$REBEL.b.t1*IV.

## Model 8 (geodesic network)
mod8 <- gam(GOV_MOP.b ~ ELECTIONS + DIST_KM + D.REBEL.GEO.t1+ROAD_5KM+POP+ELEVATION + REBEL.b.t1, data=data, family=binomial)
summary(mod8)
IV. <- predict.gam(IV,newdata=data,type="response")
data$IV_MOPUP.g <- IV.
data$R_IV_MOPUP.g <- data$REBEL.b.t1*IV.

## Model 9 (road network)
mod9 <- gam(GOV_MOP.b ~ ELECTIONS + DIST_KM + D.REBEL.ROAD.t1+ROAD_5KM+POP+ELEVATION + REBEL.b.t1, data=data, family=binomial)
summary(mod9)
IV. <- predict.gam(IV,newdata=data,type="response")
data$IV_MOPUP.r <- IV.
data$R_IV_MOPUP.r <- data$REBEL.b.t1*IV.

## Regression table
outreg(list(mod7,mod8,mod9))


############################
## Table 5 (2nd stage IV) ##
############################

## IV regression
mod10 <- gam(REBEL.b~ROAD_5KM+POP+ELEVATION+IV_MOPUP+REBEL.b.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_IV_MOPUP+s(LONG,LAT),data=data,family=binomial)
summary(mod10)
mod11 <- gam(REBEL.b~D.REBEL.ROAD.t1+ROAD_5KM+POP+ELEVATION+IV_MOPUP.r+REBEL.b.t1+R_D.REBEL.ROAD.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_IV_MOPUP.r+s(LONG,LAT),data=data,family=binomial)
summary(mod11)
mod12 <- gam(REBEL.b~D.REBEL.GEO.t1+ROAD_5KM+POP+ELEVATION+IV_MOPUP.g+REBEL.b.t1+R_D.REBEL.GEO.t1+R_ROAD_5KM+R_POP+R_ELEVATION+R_IV_MOPUP.g+s(LONG,LAT),data=data,family=binomial)
summary(mod12)

## Convert to same format as in paper (new vs. recurring)
Tab1 <- CoefTable(mod10,5)
Tab2 <- CoefTable(mod11,5)
Tab3 <- CoefTable(mod12,5)
Tab1 <- rbind(Tab1[1:2,],rep(NA,3),rep(NA,3),rep(NA,3),rep(NA,3),Tab1[3:nrow(Tab1),])
Tab2 <- rbind(Tab2[1:4,],rep(NA,3),rep(NA,3),Tab2[5:nrow(Tab2),])
Tab3 <- rbind(Tab3[1:2,],rep(NA,3),rep(NA,3),Tab3[3:nrow(Tab3),])
Table <- cbind(Tab1,Tab2[,-1],Tab3[,-1])
row.names(Table) <- NULL
Table <- as.data.frame(Table)
Table[,1] <- as.character(Table[,1])
Table[3:6,1] <- c("D.REBEL.GEO.t1","","D.REBEL.ROAD.t1","")
Table
